# ============================== SETUP ===============================
rm(list = ls())
set.seed(1)
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(tidyverse)
library(data.table)
library(dplyr)
library(ggplot2)
library(vars)
library(gridExtra)

# ========================= DATA WRANGLING ==========================
# ---- tweets ----
tweets <- fread("../datasets/input/tweets.csv", stringsAsFactors = FALSE)
tweets <- tweets[tweets$RelevantLogit==1,]
table(tweets$ads_dict)
tweets <- tweets[tweets$ads_dict==0,]

tweets$race_dict        <- ifelse(tweets$race_dict >= 2, 1, 0)
tweets$gender_dict      <- ifelse(tweets$gender_dict >= 2, 1, 0)
tweets$nationalism_dict <- ifelse(tweets$nationalism_dict >= 2, 1, 0)
tweets$partisan_dict    <- ifelse(tweets$partisan_dict >= 2, 1, 0)
tweets$religion_dict    <- ifelse(tweets$religion_dict >= 2, 1, 0)

colnames(tweets)
tweets$date  <- as.Date(tweets$created_at)
table(tweets$religion_dict)
table(tweets$seedfollow)
tweets$group <- ifelse(tweets$seedfollow == 999, "leader", "follower")
table(tweets$group)

tweets <- tweets[, c("date","group","gender_dict","nationalism_dict","partisan_dict","race_dict","religion_dict")]
tweets <- tweets %>%
  group_by(date, group) %>%
  summarize(Religion    = mean(religion_dict,    na.rm = TRUE),
            Race        = mean(race_dict,        na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan    = mean(partisan_dict,    na.rm = TRUE),
            Gender      = mean(gender_dict,      na.rm = TRUE))

tweets <- tweets %>%
  pivot_wider(names_from = group, values_from = c(Religion, Race, Nationalism, Partisan, Gender))
tweets <- tweets[order(tweets$date), ]
tweets$platform <- 1

# ---- gabs ----
gabs <- fread("../datasets/input/gabs.csv", stringsAsFactors = FALSE)
gabs <- gabs[gabs$RelevantLogit==1,]
table(gabs$ads_dict)
gabs <- gabs[gabs$ads_dict==0,]

gabs$race_dict        <- ifelse(gabs$race_dict >= 2, 1, 0)
gabs$gender_dict      <- ifelse(gabs$gender_dict >= 2, 1, 0)
gabs$nationalism_dict <- ifelse(gabs$nationalism_dict >= 2, 1, 0)
gabs$partisan_dict    <- ifelse(gabs$partisan_dict >= 2, 1, 0)
gabs$religion_dict    <- ifelse(gabs$religion_dict >= 2, 1, 0)

colnames(gabs)
gabs$date  <- as.Date(gabs$created_at)
table(gabs$religion_dict)
table(gabs$seedfollow)
gabs$group <- ifelse(gabs$seedfollow == 999, "leader", "follower")
table(gabs$group)

gabs <- gabs[, c("date","group","gender_dict","nationalism_dict","partisan_dict","race_dict","religion_dict")]
gabs <- gabs %>%
  group_by(date, group) %>%
  summarize(Religion    = mean(religion_dict,    na.rm = TRUE),
            Race        = mean(race_dict,        na.rm = TRUE),
            Nationalism = mean(nationalism_dict, na.rm = TRUE),
            Partisan    = mean(partisan_dict,    na.rm = TRUE),
            Gender      = mean(gender_dict,      na.rm = TRUE))

gabs <- gabs %>%
  pivot_wider(names_from = group, values_from = c(Religion, Race, Nationalism, Partisan, Gender))
gabs <- gabs[order(gabs$date), ]
gabs$platform <- 0

# ---- combine ----
comb <- rbind(gabs, tweets)
comb <- comb[order(comb$date), ]
comb <- comb[comb$date >= as.Date("2016-08-15") & comb$date <= as.Date("2021-05-15"), ]
comb <- na.omit(comb)
colnames(comb)
comb <- comb[, c("date","Religion_follower","Religion_leader","Race_follower","Race_leader",
                 "Nationalism_follower","Nationalism_leader","Partisan_follower","Partisan_leader",
                 "Gender_follower","Gender_leader","platform")]
comb <- ts(comb)
rm(list = c("gabs","tweets"))

# ============================ VAR ROBUSTNESS (lags 2–10) ===========================
result_list <- list()
gendercheck <- NULL
partycheck  <- NULL

for (lag in 2:10) {
  var_model_merged    <- VAR(y = comb[, 2:11], p = lag, exogen = comb[, 12])
  var_irfs_cum_merged <- irf(var_model_merged, n.ahead = 20, cumulative = TRUE)
  
  # ---- extract IRFs ----
  var_irfs <- var_irfs_cum_merged
  variables <- names(var_irfs$irf)
  elements_to_pull <- c("irf","Upper","Lower")
  
  irf_data <- NULL
  for (el in elements_to_pull) {
    new_irf_info <- var_irfs[el][[1]]
    for (out in variables) {
      new_irf_var_data <- as.data.frame(new_irf_info[out][[1]])
      new_irf_var_data_long <- new_irf_var_data %>%
        gather(cov, value)
      new_irf_var_data_long$out <- out
      new_irf_var_data_long$day <- rep(1:nrow(new_irf_var_data),
                                       length(unique(new_irf_var_data_long$cov)))
      new_irf_var_data_long$e_type <- el
      irf_data <- rbind(irf_data, new_irf_var_data_long)
    }
  }
  
  irf_data$e_type <- recode(irf_data$e_type,
                            `irf` = "pe",
                            `Lower` = "lwr",
                            `Upper` = "upr")
  
  irf_data_wide <- irf_data %>%
    mutate(value = (value / 10) * 100) %>%   # scale to percentage points
    spread(e_type, value)
  
  # ---- store key effects (day 15) for robustness figure 1 ----
  gender_row <- irf_data_wide %>%
    filter(cov == "Gender_leader", out == "Race_follower", day == 15) %>%
    mutate(lag = lag)
  party_row  <- irf_data_wide %>%
    filter(cov == "Partisan_leader", out == "Nationalism_follower", day == 15) %>%
    mutate(lag = lag)
  
  gendercheck <- rbind(gendercheck, gender_row)
  partycheck  <- rbind(partycheck,  party_row)
  
  # ---- store full results for robustness figure 2 ----
  result_list[[length(result_list) + 1]] <- irf_data_wide
}

robust <- list(gendercheck, partycheck)
dir.create("../datasets/intermediary", showWarnings = FALSE, recursive = TRUE)
save(robust,      file = "../datasets/intermediary/robust.Rdata")
save(result_list, file = "../datasets/intermediary/robustfull.Rdata")

# ========================== ROBUSTNESS PLOTS – KEY EFFECTS =========================
load("../datasets/intermediary/robust.Rdata")

# Prepare labels
robust[[1]][, 2] <- "Race\n(Followers)"
robust[[2]][, 2] <- "Nationalism\n(Followers)"

pdf("../plots/Figure6.pdf", width = 12, height = 6)

p1 <- ggplot(robust[[1]] %>%
               arrange(desc(lag)) %>%
               mutate(x_pos = row_number()),
             aes(y = pe, x = x_pos)) +
  geom_segment(aes(yend = upr, y = lwr, xend = x_pos), size = 1) +
  geom_point(size = 3) +
  scale_y_continuous(limits = c(-0.1, 0.3)) +
  coord_flip() +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~out, nrow = 1) +
  xlab("Gender\n(Leader)") +
  ylab("") +
  scale_x_continuous(breaks = 1:9, labels = paste0("lag=", 10:2)) +
  theme(
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "gray90", linetype = "solid"),
    axis.text.y = element_text(size = 16),
    axis.text.x = element_text(size = 16),
    strip.text  = element_text(size = 16),
    panel.border = element_rect(colour = "black", fill = FALSE),
    strip.background = element_rect(colour = "black"),
    axis.title  = element_text(size = 14),
    axis.title.y = element_text(angle = 0, size = 14, vjust = 0.5)
  )

p2 <- ggplot(robust[[2]] %>%
               arrange(desc(lag)) %>%
               mutate(x_pos = row_number()),
             aes(y = pe, x = x_pos)) +
  geom_segment(aes(yend = upr, y = lwr, xend = x_pos), size = 1) +
  geom_point(size = 3) +
  scale_y_continuous(limits = c(-0.5, 1)) +
  coord_flip() +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~out, nrow = 1) +
  xlab("Partisan\n(Leader)") +
  ylab("") +
  scale_x_continuous(breaks = 1:9, labels = paste0("lag=", 10:2)) +
  theme(
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "gray90", linetype = "solid"),
    axis.text.y = element_text(size = 16),
    axis.text.x = element_text(size = 16),
    strip.text  = element_text(size = 16),
    panel.border = element_rect(colour = "black", fill = FALSE),
    strip.background = element_rect(colour = "black"),
    axis.title  = element_text(size = 14),
    axis.title.y = element_text(angle = 0, size = 14, vjust = 0.5)
  )

grid.arrange(p1, p2, ncol = 2)
dev.off()

# ========================== ROBUSTNESS PLOTS – ALL EFFECTS =========================
load("../datasets/intermediary/robustfull.Rdata")

combined_plot_db <- data.frame()

for (i in 1:length(result_list)) {  # each element corresponds to a lag in 2:10
  temp_db <- result_list[[i]] %>%
    filter(day == 15) %>%
    mutate(cov = recode(cov,
                        "Gender_follower"      = "Gender\n(Followers)",
                        "Gender_leader"        = "Gender\n(Leaders)",
                        "Nationalism_follower" = "Nationalism\n(Followers)",
                        "Nationalism_leader"   = "Nationalism\n(Leaders)",
                        "Partisan_follower"    = "Partisan\n(Followers)",
                        "Partisan_leader"      = "Partisan\n(Leaders)",
                        "Race_follower"        = "Race\n(Followers)",
                        "Race_leader"          = "Race\n(Leaders)",
                        "Religion_follower"    = "Religion\n(Followers)",
                        "Religion_leader"      = "Religion\n(Leaders)"),
           out = recode(out,
                        "Gender_follower"      = "Gender\n(Followers)",
                        "Gender_leader"        = "Gender\n(Leaders)",
                        "Nationalism_follower" = "Nationalism\n(Followers)",
                        "Nationalism_leader"   = "Nationalism\n(Leaders)",
                        "Partisan_follower"    = "Partisan\n(Followers)",
                        "Partisan_leader"      = "Partisan\n(Leaders)",
                        "Race_follower"        = "Race\n(Followers)",
                        "Race_leader"          = "Race\n(Leaders)",
                        "Religion_follower"    = "Religion\n(Followers)",
                        "Religion_leader"      = "Religion\n(Leaders)")) %>%
    mutate(cov = factor(cov,
                        levels = rev(c("Race\n(Followers)",
                                       "Race\n(Leaders)",
                                       "Nationalism\n(Followers)",
                                       "Nationalism\n(Leaders)",
                                       "Gender\n(Followers)",
                                       "Gender\n(Leaders)",
                                       "Partisan\n(Followers)",
                                       "Partisan\n(Leaders)",
                                       "Religion\n(Followers)",
                                       "Religion\n(Leaders)"))),
           out = factor(out,
                        levels = c("Race\n(Followers)",
                                   "Race\n(Leaders)",
                                   "Nationalism\n(Followers)",
                                   "Nationalism\n(Leaders)",
                                   "Gender\n(Followers)",
                                   "Gender\n(Leaders)",
                                   "Partisan\n(Followers)",
                                   "Partisan\n(Leaders)",
                                   "Religion\n(Followers)",
                                   "Religion\n(Leaders)"))) %>%
    filter(cov %in% c("Race\n(Leaders)",
                      "Nationalism\n(Leaders)",
                      "Gender\n(Leaders)",
                      "Partisan\n(Leaders)",
                      "Religion\n(Leaders)")) %>%
    filter(out %in% c("Race\n(Followers)",
                      "Nationalism\n(Followers)",
                      "Gender\n(Followers)",
                      "Partisan\n(Followers)",
                      "Religion\n(Followers)"))
  
  combined_plot_db <- rbind(combined_plot_db, temp_db)
}

# Reverse order so that x-axis labels match lag=10..2 left-to-right after coord_flip
combined_plot_db <- combined_plot_db[nrow(combined_plot_db):1, ]

combined_plot_db <- combined_plot_db %>%
  group_by(cov, out) %>%
  mutate(estimate_id = row_number()) %>%
  ungroup()

dodge_width <- 0.8

pdf("../plots/FigureA12.pdf", width = 12, height = 7)

ggplot(
  combined_plot_db,
  aes(x = cov, y = pe, ymin = lwr, ymax = upr, group = estimate_id)
) +
  geom_linerange(
    aes(ymin = lwr, ymax = upr, color = "black"),
    position = position_dodge(width = dodge_width),
    size = 1
  ) +
  geom_point(
    aes(color = "black"),
    position = position_dodge(width = dodge_width),
    size = 3
  ) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~out, nrow = 1) +
  coord_flip() +
  xlab("") +
  ylab("\n15-day responses (in percentage points)") +
  scale_color_manual(
    name   = "",
    values = c("black" = "black"),
    labels = c("black" = "Point estimate with 95% CI"),
    breaks = "black"
  ) +
  theme(
    legend.position = "none",
    panel.background = element_blank(),
    panel.grid.major = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x = element_text(size = 16),
    axis.text.y = element_text(size = 16),
    strip.text  = element_text(size = 16),
    panel.border = element_rect(colour = "black", fill = FALSE),
    strip.background = element_rect(colour = "black"),
    axis.title  = element_text(size = 14),
    legend.text = element_text(size = 14, margin = margin(t = 20), vjust = 5)
  )

dev.off()
